library(dplyr)
library(ggplot2)
library(readr)
library(sf)
library(raster)
library(tmap)
## here we are going to find the root_file
root <- rprojroot::find_rstudio_root_file()
## we are going to set the data file path here
dataDir <- file.path(root, 'Data')
There are seventeen geometry types supported by the Open Geospatial Consoritum(OGC), but there are seven that are most commonly used. As you can see these are all vector formats, and sf does not handle raster data.
The sf package relies on the rgdal package for reading and writing spatial data and rgeos for spatial operations.
library(spData)
names(world)
## [1] "iso_a2" "name_long" "continent" "region_un" "subregion"
## [6] "type" "area_km2" "pop" "lifeExp" "gdpPercap"
## [11] "geom"
plot(world)
The coordinate reference system is crucial since it ties the vector and raster data types to a location on the earth (or other bodies). To get this reference system, they rely on geodetic datums which are reference points located all around the earth. The most common datums we use ar the WGS 84 and NAD 83. Because the earth is not completely smooth nor sphereical, the common datums that we use approximate an ellipsoid, which is due to the fact the earth’s rotation and effects of gravity flatten the poles while the equator slightly bulges. Many localities have specific datusm or elliposoid models to use and in the US the NAD 83 is very common. This data will me located in the ellps parameter of the PROJ CRS library.
Here are the important features when modeling the earth. - Actual Earth’s surface: has mountains and oceans. - Geoid: the shape that the ocean surface would take under the influence of the gravity and rotation of Earth alone, if other influences such as winds and tides were absent. - ellipsoid: The approximation of the Earth’s surface into a more regular shape.
Also when you are working with multiple files with different CRSs you will have to transform them to a common CRS or they will not align. You can think of this as trying to do any calculation with data with different units.
Geographic coordinate systems use angular measurements to locate a place on the surface of the earth. These measures are called Latitude and Longitude, they are a measure from the center of earth to the location on the surface. Longitude is the E-W distance from the Prime Meridian plane, while latitude is the N-S distance from the equatorial plane.
One important aspect to highlight is that these are angular measurements. This means that it is not a distance like meters or miles and there fore a distance calculation is not straight forward since we are still talk about a curved surface of the earth and its location.
Projected coordinate systems are based on Cartesian coordinates if you recall making graphs in math classes with x and y locaitons. Unlike the angular measurements of the geographic coordinate systems, these are linear measusrements like meters and feet. Projected coordinate systems are based on geographic coordinate systems but take the 3D object of the earth and project it into 2D.
This projection will always cause some sort of distortion when you go from a 3D object to a 2D. The classic example is trying to flatten a orange peel, you can’t do that with out tearing the skin, flattening and stretching it out.
There are certain properties that we want to accurately capture when we are working with mapping, these are area, distance, direction, and shape. A projected coordinate can only preserve one or two properties, because of this the map maker must think about the trade-offs being made when projecting the earth on to a 2D map. This is apparent when you think about the debate betwen the “Euro-centric” Mercator projection that we have all become accostumed to compared with the Peters Projection that accurately displays areas.
The last thing to cover is the three main groups of projections types which are conic, cylindrical and planar or azimuths. As you can probably infer by their names these are the shapes that are being used to capture the projection, if you think of earth with a light bulb in the middle and place these objects on the surface of the earth, you can see that the earths surface would be projected on the shape. Distortions are minimized where the projectino shape is tangent with the earths surface and greater the further you get from it. These shapes either can have a single line of tangency or two lines of tangency also.
You can work with the CRS of geographic data with epsg codes or proj4string defintions. epsg codes are shorter and might be easier to remember while the proj4string will allow you more flexibility when dictating projections type, datum, and ellpsoid parameters. epsg only referes to one specific CRS which does not allow you to change different parameters.
nybb <- read_sf(file.path(dataDir, "gis/nybb"))
st_crs(nybb)
## Coordinate Reference System:
## No EPSG code
## proj4string: "+proj=lcc +lat_1=40.66666666666666 +lat_2=41.03333333333333 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs"
centralPark = data.frame(lon = -73.9691305, lat = 40.7764627) %>%
st_as_sf(coords = c("lon", "lat"))
st_is_longlat(centralPark)
## [1] NA
centralPark_geo = st_set_crs(centralPark, 4326)
st_is_longlat(centralPark_geo)
## [1] TRUE
Here you can see that only the second dataset created a warning. We are trying to run a distance based function, creating a buffer around a data with latitude and longitude.
centralPark_buff_no_crs = st_buffer(centralPark, dist = 1)
cebtralPark_buff = st_buffer(centralPark_geo, dist = 1)
tm_shape(centralPark) + tm_symbols() +
tm_shape(nybb) + tm_polygons()
Area_Albers <- 102008
Area_Equidistant <- 102005
Area_Lambert <- 102004
Area_NYStatePlane <- 2263
projections <- setNames(c(Area_Albers, Area_Lambert,Area_Equidistant, Area_NYStatePlane),
c("Area_Albers", "Area_Lambert","Area_Equidistant", "Area_NYStatePlane")) %>%
as.list()
1 square meter = 10.7639 square feet.
massReproject <- function(shape, projection){
# we have to reporject and recalulate the area
st_transform(shape, projection) %>% st_area()
}
areaCalc <- purrr::map_df(projections, ~massReproject(nybb, .x)) %>%
mutate_at(vars(Area_Albers, Area_Equidistant, Area_Lambert), function(x) x*10.7639) %>%
mutate(BoroName = nybb$BoroName) %>%
mutate(Albers_Diff = Area_Albers-Area_Albers,
Lambert_Diff = Area_Albers- Area_Lambert,
Equidistant_Diff = Area_Albers-Area_Equidistant,
NYStatePlane_Diff = Area_Albers-Area_NYStatePlane)
areaCalc %>% dplyr::select(5:9) %>%
tidyr::gather(projection, difference, -BoroName) %>%
ggplot(aes(projection, difference, fill = difference)) +
geom_col() +
coord_flip()+
facet_wrap(~BoroName, scales = "free") +
theme_minimal()
the sf library is great because it extends the data.frame and adds geographic features to it. Much like the tidy data paradigm, each row is still an observations and each column is a feature. The main difference between a normal data.frame and an sf object is that there is another column geometry baked in which can contain a multitude of geographic types like points, lines and polygons.
You will see that a lot of methods that you are used to working with in data.frames will apply to sf objects. Like rbind, $<-, and cbind.
methods(class = "sf")
## [1] [ [[<- $<-
## [4] aggregate anti_join arrange
## [7] as.data.frame cbind coerce
## [10] dbDataType dbWriteTable distinct
## [13] extent extract filter
## [16] full_join group_by identify
## [19] initialize inner_join left_join
## [22] mask merge mutate
## [25] plot print raster
## [28] rasterize rbind rename
## [31] right_join sample_frac sample_n
## [34] select semi_join show
## [37] slice slotsFromS3 st_agr
## [40] st_agr<- st_area st_as_sf
## [43] st_bbox st_boundary st_buffer
## [46] st_cast st_centroid st_collection_extract
## [49] st_convex_hull st_coordinates st_crop
## [52] st_crs st_crs<- st_difference
## [55] st_geometry st_geometry<- st_intersection
## [58] st_is st_line_merge st_nearest_points
## [61] st_node st_point_on_surface st_polygonize
## [64] st_precision st_segmentize st_set_precision
## [67] st_simplify st_snap st_sym_difference
## [70] st_transform st_triangulate st_union
## [73] st_voronoi st_wrap_dateline st_write
## [76] st_zm summarise transmute
## [79] ungroup
## see '?methods' for accessing help and source code
If we ever wanted to go back to a normal data.frame object it’s very simple
st_drop_geometry(nybb) %>% class()
## [1] "tbl_df" "tbl" "data.frame"
nybb[1,]
## Simple feature collection with 1 feature and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 971013.5 ymin: 188082.3 xmax: 1010066 ymax: 259547.8
## epsg (SRID): NA
## proj4string: +proj=lcc +lat_1=40.66666666666666 +lat_2=41.03333333333333 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs
## # A tibble: 1 x 5
## BoroCode BoroName Shape_Leng Shape_Area geometry
## <int> <chr> <dbl> <dbl> <MULTIPOLYGON [US_survey_foot]>
## 1 1 Manhattan 361650. 636600558. (((981219.1 188655.3, 980940.5 …
nybb[,c(2,4)]
## Simple feature collection with 5 features and 2 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 913175.1 ymin: 120121.9 xmax: 1067383 ymax: 272844.3
## epsg (SRID): NA
## proj4string: +proj=lcc +lat_1=40.66666666666666 +lat_2=41.03333333333333 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs
## # A tibble: 5 x 3
## BoroName Shape_Area geometry
## <chr> <dbl> <MULTIPOLYGON [US_survey_foot]>
## 1 Manhattan 636600558. (((981219.1 188655.3, 980940.5 188435.4, 980873 …
## 2 Bronx 1186615449. (((1012822 229228.3, 1012786 229165.3, 1012704 2…
## 3 Staten Isl… 1623920682. (((970217 145643.3, 970227.2 145641.6, 970273.9 …
## 4 Brooklyn 1937566944. (((1021176 151374.8, 1021003 151329.1, 1020875 1…
## 5 Queens 3044771591. (((1029606 156073.8, 1029578 156052.2, 1029581 1…
nybb %>% slice(1:2)
## Simple feature collection with 2 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 913175.1 ymin: 120121.9 xmax: 1067383 ymax: 272844.3
## epsg (SRID): NA
## proj4string: +proj=lcc +lat_1=40.66666666666666 +lat_2=41.03333333333333 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs
## # A tibble: 2 x 5
## BoroCode BoroName Shape_Leng Shape_Area geometry
## <int> <chr> <dbl> <dbl> <MULTIPOLYGON [US_survey_foot]>
## 1 1 Manhattan 361650. 6.37e8 (((981219.1 188655.3, 980940.5 …
## 2 2 Bronx 463465. 1.19e9 (((1012822 229228.3, 1012786 22…
nybb %>% dplyr::select(2)
## Simple feature collection with 5 features and 1 field
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 913175.1 ymin: 120121.9 xmax: 1067383 ymax: 272844.3
## epsg (SRID): NA
## proj4string: +proj=lcc +lat_1=40.66666666666666 +lat_2=41.03333333333333 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs
## # A tibble: 5 x 2
## BoroName geometry
## <chr> <MULTIPOLYGON [US_survey_foot]>
## 1 Manhattan (((981219.1 188655.3, 980940.5 188435.4, 980873 188511, 981…
## 2 Bronx (((1012822 229228.3, 1012786 229165.3, 1012704 229195.9, 10…
## 3 Staten Isla… (((970217 145643.3, 970227.2 145641.6, 970273.9 145641.6, 9…
## 4 Brooklyn (((1021176 151374.8, 1021003 151329.1, 1020875 151338.2, 10…
## 5 Queens (((1029606 156073.8, 1029578 156052.2, 1029581 156031.3, 10…
nybb %>% filter(Shape_Area >= 1937566944)
## Simple feature collection with 2 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 972619.7 ymin: 136686.8 xmax: 1067383 ymax: 231158
## epsg (SRID): NA
## proj4string: +proj=lcc +lat_1=40.66666666666666 +lat_2=41.03333333333333 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +datum=NAD83 +units=us-ft +no_defs
## # A tibble: 2 x 5
## BoroCode BoroName Shape_Leng Shape_Area geometry
## <int> <chr> <dbl> <dbl> <MULTIPOLYGON [US_survey_foot]>
## 1 3 Brooklyn 739945. 1937566944. (((1021176 151374.8, 1021003 15…
## 2 4 Queens 895229. 3044771591. (((1029606 156073.8, 1029578 15…
library(rvest)
demographics <- read_html("https://en.wikipedia.org/wiki/Demographics_of_New_York_City") %>%
html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[1]') %>%
html_table(header = FALSE) %>% "[["(1) %>% slice(4:8) %>%
mutate(X1 = c("Bronx", "Brooklyn", "Manhattan", "Queens", "Staten Island")) %>%
dplyr::select(c(1,3:5))
names(demographics) <- c("BoroName", "population",
"GDP", "GDPperCapita")
nybb_demo <- nybb %>% left_join(demographics) %>%
mutate_at(vars(population, GDP, GDPperCapita), function(x) stringr::str_remove_all(x, ",") %>% as.numeric())
plot(nybb_demo["population"])
jul14 <- read_csv("https://raw.githubusercontent.com/fivethirtyeight/uber-tlc-foil-response/master/uber-trip-data/uber-raw-data-jul14.csv")
uberJuly <- st_as_sf(jul14, coords = c("Lon", "Lat"),crs = 4326)
set.seed(1234)
uberJuly <- st_transform(uberJuly, 2263) %>% sample_frac(0.2)
set.seed(1234)
uberSamp <- uberJuly %>% sample_n(100)
tm_shape(nybb) + tm_polygons()+
tm_shape(uberSamp) + tm_symbols(size = .2, col = "white")
st_crs(nybb) <- 2263
st_intersects(uberSamp, nybb)
## Sparse geometry binary predicate list of length 100, where the predicate was `intersects'
## first 10 elements:
## 1: 1
## 2: 1
## 3: 1
## 4: 1
## 5: 4
## 6: 1
## 7: 4
## 8: 1
## 9: 1
## 10: 1
st_intersects(uberSamp, nybb, sparse = FALSE)
## [,1] [,2] [,3] [,4] [,5]
## [1,] TRUE FALSE FALSE FALSE FALSE
## [2,] TRUE FALSE FALSE FALSE FALSE
## [3,] TRUE FALSE FALSE FALSE FALSE
## [4,] TRUE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE TRUE FALSE
## [6,] TRUE FALSE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE TRUE FALSE
## [8,] TRUE FALSE FALSE FALSE FALSE
## [9,] TRUE FALSE FALSE FALSE FALSE
## [10,] TRUE FALSE FALSE FALSE FALSE
## [11,] TRUE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE TRUE FALSE
## [13,] TRUE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE TRUE FALSE
## [15,] FALSE FALSE FALSE TRUE FALSE
## [16,] TRUE FALSE FALSE FALSE FALSE
## [17,] TRUE FALSE FALSE FALSE FALSE
## [18,] TRUE FALSE FALSE FALSE FALSE
## [19,] TRUE FALSE FALSE FALSE FALSE
## [20,] TRUE FALSE FALSE FALSE FALSE
## [21,] TRUE FALSE FALSE FALSE FALSE
## [22,] TRUE FALSE FALSE FALSE FALSE
## [23,] TRUE FALSE FALSE FALSE FALSE
## [24,] TRUE FALSE FALSE FALSE FALSE
## [25,] TRUE FALSE FALSE FALSE FALSE
## [26,] TRUE FALSE FALSE FALSE FALSE
## [27,] TRUE FALSE FALSE FALSE FALSE
## [28,] TRUE FALSE FALSE FALSE FALSE
## [29,] TRUE FALSE FALSE FALSE FALSE
## [30,] TRUE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE TRUE
## [32,] TRUE FALSE FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE FALSE TRUE
## [35,] TRUE FALSE FALSE FALSE FALSE
## [36,] TRUE FALSE FALSE FALSE FALSE
## [37,] TRUE FALSE FALSE FALSE FALSE
## [38,] TRUE FALSE FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE TRUE FALSE
## [40,] TRUE FALSE FALSE FALSE FALSE
## [41,] TRUE FALSE FALSE FALSE FALSE
## [42,] TRUE FALSE FALSE FALSE FALSE
## [43,] TRUE FALSE FALSE FALSE FALSE
## [44,] FALSE FALSE FALSE TRUE FALSE
## [45,] FALSE FALSE FALSE FALSE FALSE
## [46,] FALSE FALSE FALSE TRUE FALSE
## [47,] FALSE FALSE FALSE FALSE TRUE
## [48,] TRUE FALSE FALSE FALSE FALSE
## [49,] TRUE FALSE FALSE FALSE FALSE
## [50,] TRUE FALSE FALSE FALSE FALSE
## [51,] TRUE FALSE FALSE FALSE FALSE
## [52,] TRUE FALSE FALSE FALSE FALSE
## [53,] TRUE FALSE FALSE FALSE FALSE
## [54,] TRUE FALSE FALSE FALSE FALSE
## [55,] TRUE FALSE FALSE FALSE FALSE
## [56,] TRUE FALSE FALSE FALSE FALSE
## [57,] TRUE FALSE FALSE FALSE FALSE
## [58,] TRUE FALSE FALSE FALSE FALSE
## [59,] TRUE FALSE FALSE FALSE FALSE
## [60,] TRUE FALSE FALSE FALSE FALSE
## [61,] TRUE FALSE FALSE FALSE FALSE
## [62,] FALSE FALSE FALSE FALSE TRUE
## [63,] TRUE FALSE FALSE FALSE FALSE
## [64,] FALSE FALSE FALSE TRUE FALSE
## [65,] TRUE FALSE FALSE FALSE FALSE
## [66,] TRUE FALSE FALSE FALSE FALSE
## [67,] TRUE FALSE FALSE FALSE FALSE
## [68,] TRUE FALSE FALSE FALSE FALSE
## [69,] TRUE FALSE FALSE FALSE FALSE
## [70,] TRUE FALSE FALSE FALSE FALSE
## [71,] TRUE FALSE FALSE FALSE FALSE
## [72,] TRUE FALSE FALSE FALSE FALSE
## [73,] TRUE FALSE FALSE FALSE FALSE
## [74,] TRUE FALSE FALSE FALSE FALSE
## [75,] FALSE FALSE FALSE FALSE TRUE
## [76,] TRUE FALSE FALSE FALSE FALSE
## [77,] FALSE FALSE FALSE TRUE FALSE
## [78,] TRUE FALSE FALSE FALSE FALSE
## [79,] TRUE FALSE FALSE FALSE FALSE
## [80,] TRUE FALSE FALSE FALSE FALSE
## [81,] TRUE FALSE FALSE FALSE FALSE
## [82,] TRUE FALSE FALSE FALSE FALSE
## [83,] TRUE FALSE FALSE FALSE FALSE
## [84,] FALSE FALSE FALSE FALSE TRUE
## [85,] TRUE FALSE FALSE FALSE FALSE
## [86,] FALSE FALSE FALSE TRUE FALSE
## [87,] FALSE FALSE FALSE FALSE TRUE
## [88,] TRUE FALSE FALSE FALSE FALSE
## [89,] TRUE FALSE FALSE FALSE FALSE
## [90,] TRUE FALSE FALSE FALSE FALSE
## [91,] FALSE FALSE FALSE TRUE FALSE
## [92,] TRUE FALSE FALSE FALSE FALSE
## [93,] FALSE FALSE FALSE TRUE FALSE
## [94,] TRUE FALSE FALSE FALSE FALSE
## [95,] TRUE FALSE FALSE FALSE FALSE
## [96,] TRUE FALSE FALSE FALSE FALSE
## [97,] FALSE TRUE FALSE FALSE FALSE
## [98,] TRUE FALSE FALSE FALSE FALSE
## [99,] FALSE FALSE FALSE FALSE TRUE
## [100,] TRUE FALSE FALSE FALSE FALSE
st_disjoint(uberSamp, nybb)
## Sparse geometry binary predicate list of length 100, where the predicate was `disjoint'
## first 10 elements:
## 1: 2, 3, 4, 5
## 2: 2, 3, 4, 5
## 3: 2, 3, 4, 5
## 4: 2, 3, 4, 5
## 5: 1, 2, 3, 5
## 6: 2, 3, 4, 5
## 7: 1, 2, 3, 5
## 8: 2, 3, 4, 5
## 9: 2, 3, 4, 5
## 10: 2, 3, 4, 5
st_within(uberSamp, nybb)
## Sparse geometry binary predicate list of length 100, where the predicate was `within'
## first 10 elements:
## 1: 1
## 2: 1
## 3: 1
## 4: 1
## 5: 4
## 6: 1
## 7: 4
## 8: 1
## 9: 1
## 10: 1
sel = st_is_within_distance(uberSamp, nybb, dist = 100)
lengths(sel) > 0
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [12] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [23] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
## [34] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [45] FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [56] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [67] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [78] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [89] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [100] TRUE
sgbp is a sparse geometry binary predicate
sel_nycUber = st_intersects(x = uberJuly, y = nybb)
class(sel_nycUber)
## [1] "sgbp"
#> [1] "sgbp"
sel_logical = lengths(sel_nycUber) > 0
nycUber = uberJuly[sel_logical, ]
set.seed(1234)
nycUberSamp <- nycUber %>% sample_n(100)
tm_shape(nybb) + tm_polygons()+
tm_shape(nycUberSamp) + tm_symbols(size = .2, col = "white")
st_crs(nybb_demo) <- 2263
uberJoin <- st_join(nycUber, nybb_demo)
tm_shape(nybb) + tm_polygons(col = "white")+
tm_shape(uberJoin) + tm_symbols(size = .2, col = "BoroName",
alpha = 0.4)
uberJuly %>%
mutate(x = rnorm(n = 100 )) %>% aggregate(by = nybb, FUN = mean)
## Simple feature collection with 5 features and 3 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 913175.1 ymin: 120121.9 xmax: 1067383 ymax: 272844.3
## epsg (SRID): 2263
## proj4string: +proj=lcc +lat_1=41.03333333333333 +lat_2=40.66666666666666 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000.0000000001 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
## Date/Time Base x geometry
## 1 NA NA -0.23692334 MULTIPOLYGON (((981219.1 18...
## 2 NA NA -0.63554541 MULTIPOLYGON (((1012822 229...
## 3 NA NA NA MULTIPOLYGON (((970217 1456...
## 4 NA NA -0.33565488 MULTIPOLYGON (((1021176 151...
## 5 NA NA -0.06002476 MULTIPOLYGON (((1029606 156...
When we were looking at topological realationships, the results were only binary, either they intersect or does not, etc…
nybb_centroid = st_centroid(nybb)
st_distance(uberJuly[1,], nybb_centroid)
## Units: [US_survey_foot]
## [,1] [,2] [,3] [,4] [,5]
## [1,] 9944.048 34682.15 92698 46392.09 39162.31
st_distance(uberJuly[1:10,], nybb_centroid)
## Units: [US_survey_foot]
## [,1] [,2] [,3] [,4] [,5]
## [1,] 9944.048 34682.15 92698.00 46392.09 39162.31
## [2,] 14350.173 53452.22 74340.71 42129.59 54448.37
## [3,] 8917.171 46905.41 80096.26 40288.51 46351.51
## [4,] 70408.401 79187.86 105732.47 45977.80 24943.76
## [5,] 27669.720 65513.16 61648.24 27816.79 51850.71
## [6,] 12506.557 30308.54 99389.79 60813.61 54772.46
## [7,] 20234.865 59144.45 68011.32 36023.21 53597.97
## [8,] 37911.924 70336.98 63236.30 11013.08 41343.10
## [9,] 4236.399 42844.71 85017.06 48160.04 51421.10
## [10,] 21462.723 59884.26 67111.24 32900.19 51396.36
plot(st_geometry(nybb_centroid))
plot(st_geometry(uberJuly)[1:10], add = TRUE, col = "red")
nyc_simp <- st_simplify(nybb, dTolerance = 500)
plot(nyc_simp["BoroName"])
plot(nybb["BoroName"])
object.size(nybb)
## 1250752 bytes
object.size(nyc_simp)
## 35232 bytes
nyc_ms = rmapshaper::ms_simplify(nybb, keep = 0.50,
keep_shapes = TRUE)
nybb_centroid = st_centroid(nybb)
tm_shape(nybb) + tm_polygons()+
tm_shape(nybb_centroid) + tm_symbols()
subway <- read_sf(file.path(dataDir, "gis", "SubwayLines")) %>%
st_transform(2263)
tm_shape(nybb) + tm_polygons(col ="white")+
tm_shape(subway) + tm_lines()
subwayRT <- subway %>% pull(rt_symbol) %>% unique()
splitSubway <- subway %>% tidyr::nest(-rt_symbol) %>%
dplyr::mutate(combine = purrr::map(data, st_combine))
subwayLines <- splitSubway$combine %>% purrr::reduce(c) %>%
st_sf() %>% mutate(lines = subwayRT)
subwayCentroids <- sf::st_centroid(subwayLines)
subwaysurface <- st_point_on_surface(subwayLines)
tm_shape(nybb %>% filter(BoroName != "Staten Island")) + tm_polygons(col = "white")+
tm_shape(subwayLines) + tm_lines(col = "lines") +
tm_shape(subwayCentroids) + tm_symbols(col = "lines", size =0.5, shape = 3)+
tm_shape(subwaysurface) + tm_symbols(col = "lines", size = 0.5, shape = 5)
subwayQuarterBuffer = st_buffer(subwayLines, dist = 1320)
subwayQuarterBuffer
## Simple feature collection with 9 features and 1 field
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: 974358.1 ymin: 147659.2 xmax: 1053489 ymax: 269666
## epsg (SRID): 2263
## proj4string: +proj=lcc +lat_1=41.03333333333333 +lat_2=40.66666666666666 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000.0000000001 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
## lines geometry
## 1 G POLYGON ((984516.5 183753.8...
## 2 N POLYGON ((975106.4 166532, ...
## 3 B POLYGON ((981862.3 178282.2...
## 4 L MULTIPOLYGON (((1028583 151...
## 5 A POLYGON ((981387.2 196920.9...
## 6 7 POLYGON ((985095.9 214415.8...
## 7 J POLYGON ((980086.2 197432.1...
## 8 1 POLYGON ((979174.4 197454.1...
## 9 4 POLYGON ((979130.8 196564.9...
tm_shape(nybb %>% filter(BoroName != "Staten Island")) + tm_polygons(col = "white") +
tm_shape(subwayQuarterBuffer) + tm_polygons(col = "lines", alpha = 0.2)
sel_subwayUber = st_intersects(x = uberJuly, y = subwayQuarterBuffer)
sel_subwayUber = lengths(sel_subwayUber) > 0
subwayUber = uberJuly[sel_subwayUber, ]
tm_shape(nybb) + tm_polygons()+
tm_shape(subwayUber) + tm_symbols(size = .2, col = "white")
Inverse choice
sel_subwayUber = st_intersects(x = uberJuly, y = subwayQuarterBuffer)
sel_subwayUber = lengths(sel_subwayUber) < 1
subwayUber = uberJuly[sel_subwayUber, ]
tm_shape(nybb) + tm_polygons()+
tm_shape(subwayUber) + tm_symbols(size = .2, col = "white")
let’s rasterize the uber data
uberSP <- as(nycUber %>% dplyr::select(-`Date/Time`,-Base), "Spatial")
uberSP = spTransform(uberSP, CRS("+init=epsg:2263"))
rast <- raster(crs = CRS("+init=epsg:2263"))
extent(rast) <- extent(uberSP)
ncol(rast) <- 50
nrow(rast) <- 50
rast2 <- rasterize(uberSP, rast, fun=function(x,...)length(x))
plot(rast2)
uberSamp$density <- raster::extract(rast2, uberSamp)
tm_shape(nybb) + tm_polygons(col = "white")+
tm_shape(uberSamp) + tm_symbols(size = .2, col = "density")